---
title: "The Gender Gap in Peace and Conflict Journals, 2000–2024"
subtitle: "Replication Code: Citation Analysis for Five Journals"
author:
  - Daina Chiba
  - Wakako Maekawa
date: today
format:
  html: 
    toc: true
    page-layout: full
    embed-resources: true
code-fold: false
execute:
  message: false
  warning: false
---

# Preparation

Install and load necessary packages.

```{r}
## Install pacman if not already installed
if (!require("pacman")) install.packages("pacman")

## Unload any loaded packages
pacman::p_unload(pacman::p_loaded(), character.only = TRUE)

## Delete everything in the memory
rm(list=ls(all=TRUE))

## Load packages
pacman::p_load(tidyverse, stargazer, tidylog, ggeffects, 
               fuzzyjoin, stringdist, stringr, readxl, quanteda, 
               MASS, sandwich, lmtest)
```

Define colors

```{r}
## Define colors
col_m <- "#5DC863FF"
col_f <- "#440154FF"
col_u <- rgb(205, 205, 205, maxColorValue = 255)
```

Load data

```{r}
df <- readRDS("data_citations_five_journals.rds")
```

Order journals by age

```{r}
df <- df |> 
  mutate(journal = fct_relevel(journal, "ISQ", "JCR", "JPR", "CMPS", "II"))
```

# Descriptive statistics

```{r}
summary(df)
```

```{r}
median(df$Citation_Count)
```

```{r}
median(df$per_female)
```

```{r}
median(df$fem_topic)
```

```{r}
with(df, cor.test(per_female, fem_topic, method = "pearson"))
```

# Effect of Female Ratio on Citations

```{r}
fit1_fe <- glm.nb(Citation_Count ~ 0 + author_num + fem_topic + 
                    per_female + journal + offset(log(age)), 
                  data = df)

fit1_ji <- glm.nb(Citation_Count ~ 0 + author_num + fem_topic + 
                    per_female * journal + offset(log(age)), 
                  data = df)

fit1_fe_se <- sqrt(diag(vcovCL(fit1_fe, cluster = df $ journal)))
fit1_ji_se <- sqrt(diag(vcovCL(fit1_ji, cluster = df $ journal)))
model_list <- list(fit1_fe, fit1_ji)
se_list <- list(fit1_fe_se, fit1_ji_se)

fit1_fe_cl <- coeftest(fit1_fe, vcov = vcovCL, cluster = df $ journal)
fit1_ji_cl <- coeftest(fit1_ji, vcov = vcovCL, cluster = df $ journal)
```

```{r}
#| eval: false
#| echo: false
#| warning: false
stargazer(fit1_fe_cl, fit1_ji_cl, type = "text")
```

```{r}
#| warning: false
#| results: asis

stargazer(model_list, 
          se = se_list,
          title = "Negative binomial models of citations (five journals)",
          order = c("author_num", 
                    "fem_topic", 
                    "^journal", 
                    "^per_female"
                    ),
          covariate.labels = c("Number of authors", 
                               "Feminine topics", 
                               "ISQ", "JCR", "JPR", "CMPS", "II",
                               "Ratio of female authors", 
                               "Ratio of female authors x JCR",
                               "Ratio of female authors x JPR",
                               "Ratio of female authors x CMPS",
                               "Ratio of female authors x II"),
          type = "html")
```

```{r}
#| warning: false
#| echo: false
#| eval: false
stargazer(model_list, 
          se = se_list,
          title = "Negative binomial models of citations (five journals)",
          order = c("author_num", 
                    "fem_topic", 
                    "^journal", 
                    "^per_female"
                    ),
          covariate.labels = c("Number of authors", 
                               "Feminine topics", 
                               "ISQ", "JCR", "JPR", "CMPS", "II",
                               "Ratio of female authors", 
                               "Ratio of female authors x JCR",
                               "Ratio of female authors x JPR",
                               "Ratio of female authors x CMPS",
                               "Ratio of female authors x II"),
          type = "latex")
```

## Predictive margins

### ISQ

```{r}
pred_isq <- predict_response(model = fit1_ji, 
                             terms = c("per_female"), 
                             condition = c(journal = "ISQ", 
                                           age = mean(df $ age),
                                           fem_topic = median(df $ fem_topic),
                                           author_num = median(df $ author_num)),
                             vcov = vcovCL(fit1_ji, cluster = df$journal))
pred_isq
```

### JCR

```{r}
pred_jcr <- predict_response(model = fit1_ji, 
                             terms = c("per_female"), 
                             condition = c(journal = "JCR", 
                                           age = mean(df $ age),
                                           fem_topic = median(df $ fem_topic),
                                           author_num = median(df $ author_num)),
                             vcov = vcovCL(fit1_ji, cluster = df$journal))
pred_jcr
```

### JPR

```{r}
pred_jpr <- predict_response(model = fit1_ji, 
                             terms = c("per_female"), 
                             condition = c(journal = "JPR", 
                                           age = mean(df $ age),
                                           fem_topic = median(df $ fem_topic),
                                           author_num = median(df $ author_num)),
                             vcov = vcovCL(fit1_ji, cluster = df$journal))
pred_jpr
```

### CMPS

```{r}
pred_cmps <- predict_response(model = fit1_ji, 
                              terms = c("per_female"), 
                             condition = c(journal = "CMPS", 
                                           age = mean(df $ age),
                                           fem_topic = median(df $ fem_topic),
                                           author_num = median(df $ author_num)),
                              vcov = vcovCL(fit1_ji, cluster = df$journal))
pred_cmps
```

### II

```{r}
pred_ii <- predict_response(model = fit1_ji, 
                            terms = c("per_female"), 
                             condition = c(journal = "II", 
                                           age = mean(df $ age),
                                           fem_topic = median(df $ fem_topic),
                                           author_num = median(df $ author_num)),
                            vcov = vcovCL(fit1_ji, cluster = df$journal))
pred_ii
```

Plot predictive margins

```{r}
pred_five <- bind_rows(
  data.frame(pred_isq) |> mutate(journal = "ISQ"),
  data.frame(pred_jcr) |> mutate(journal = "JCR"),
  data.frame(pred_jpr) |> mutate(journal = "JPR"),
  data.frame(pred_cmps) |> mutate(journal = "CMPS"),
  data.frame(pred_ii) |> mutate(journal = "II")
  ) |> 
  mutate(journal = factor(journal, levels = c("ISQ", "JCR", "JPR", "CMPS", "II")))

pred_five <- pred_five |> 
  group_by(journal) |> 
  mutate(
    effect = case_when(
      row_number() == 1 ~ {
        diff <- last(predicted) - first(predicted)
        case_when(
          diff > 1 ~ "positive", 
          diff < -1 ~ "negative",
          TRUE ~ "zero"
        )
      },
      TRUE ~ NA_character_
    )
  ) |> 
  fill(effect, .direction = "downup") |>
  ungroup() |> 
  mutate(effect = factor(effect, levels = c("positive", "zero", "negative")))

table(pred_five $ effect)
```


## Figure 5 in the paper

```{r}
ggplot(pred_five, aes(x = x, y = predicted, color = effect)) +
  geom_line(linewidth = 1) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = effect), 
              alpha = 0.3, color = NA) + 
  scale_color_manual(values = c(col_f, col_u, col_m)) + 
  scale_fill_manual(values = c(col_f, col_u, col_m)) + 
  scale_y_continuous(name = "Predicted citation count", limits = c(0, NA)) + 
  scale_x_continuous(name = "Ratio of female authors") + 
  theme_minimal() + 
  facet_wrap(~ journal, ncol = 5) + 
  theme(legend.position = "none", 
        axis.text.x = element_text(angle = 60, size = 9),
        axis.text.y = element_text(size = 11),
        strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        legend.title = element_text(size = 15),
        legend.text = element_text(size = 15)
        )
```

# Effect of Female Ratio on Citations by Time

We run a separate model for each journal. In doing so, we interact per_female with time since 2020. We use time since 2020 rather than the raw year for numerical stability (2020\^2 = 4080400).

```{r}
model_int_list <- list()
se_int_list <- list()
eff_int_list <- list()
pred_count <- c()

for(j in levels(df $ journal)){
  journal_data <- df |> filter(journal == j)
  
  model <- glm.nb(
    Citation_Count ~ author_num + fem_topic + 
      per_female * (time + I(time^2) + I(time^3)) + offset(log(age)), 
    data = journal_data
  )
  # Use HC1 robust SE
  model_int_list[[j]] <- model
  se_int_list[[j]] <- sqrt(diag(vcovHC(model, type = "HC1")))
  
  # Predicted values
  pred <- ggpredict(model, terms = c("time [all]", "per_female [0, 0.5, 1]"),
          condition = c(age = median(journal_data $ age), 
                        author_num = median(journal_data $ author_num),
                        fem_topic = median(journal_data $ fem_topic)),
  vcov.fun = "vcovHC",
  vcov.type = "HC1")
  pred_count <- bind_rows(pred_count, data.frame(pred) |> mutate(journal = j))
}
```

```{r}
#| warning: false
#| echo: false
#| eval: false

stargazer(model_int_list, se = se_int_list, type = "text")
```

```{r}
#| warning: false
#| echo: false
#| eval: false

stargazer(model_int_list, se = se_int_list, type = "latex")
```


## Figure 6 in the paper


```{r}
pred_count <- pred_count |> 
  mutate(journal = factor(journal, levels = c("ISQ", "JCR", "JPR", "CMPS", "II")))

ggplot(data = pred_count, 
       mapping = aes(x = x, y = predicted, group = group, color = group)) + 
  geom_line(linewidth = 1) + 
  geom_ribbon(aes(ymin = conf.low, 
                  ymax = conf.high, fill = group), 
              alpha = 0.3, color = NA) + 
  facet_wrap(~ journal, ncol = 5) + 
  scale_y_continuous(name = "Predicted citation count", limits = c(0, 170)) + 
  scale_x_continuous(name = "Year", 
                     breaks = c(0, 5, 10, 15, 20), 
                     labels = c(2000, 2005, 2010, 2015, 2020)) + 
  scale_color_manual(values = c(col_m, col_u, col_f)) + 
  scale_fill_manual(values = c(col_m, col_u, col_f)) + 
  labs(color = "Ratio of female authors", 
       fill = "Ratio of female authors") + 
  theme_minimal() + 
  theme(legend.position = "bottom", 
        axis.text.x = element_text(angle = 60, size = 9),
        axis.text.y = element_text(size = 11),
        strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        legend.title = element_text(size = 15),
        legend.text = element_text(size = 15)
        )
```
